vote_df = read_csv("./datasets/president_county_candidate.csv")
## Parsed with column specification:
## cols(
## state = col_character(),
## county = col_character(),
## candidate = col_character(),
## party = col_character(),
## total_votes = col_double(),
## won = col_logical()
## )
state_sum = read_csv("./datasets/president_state.csv")
## Parsed with column specification:
## cols(
## state = col_character(),
## total_votes = col_double()
## )
region_df =
read_csv("./datasets/states.csv") %>%
rename(state = State)
## Parsed with column specification:
## cols(
## State = col_character(),
## `State Code` = col_character(),
## Region = col_character(),
## Division = col_character()
## )
election_winner_df =
read_csv("./datasets/president_county_candidate.csv") %>%
group_by(state, party) %>%
mutate(party_total = sum(total_votes)) %>%
ungroup() %>%
group_by(state) %>%
mutate(state_winner = case_when(
party_total == max(party_total) ~ TRUE,
party_total != max(party_total) ~ FALSE),
state_total = sum(total_votes)
)
## Parsed with column specification:
## cols(
## state = col_character(),
## county = col_character(),
## candidate = col_character(),
## party = col_character(),
## total_votes = col_double(),
## won = col_logical()
## )
winner_region =
left_join(election_winner_df, region_df) %>%
filter(state_winner == TRUE) %>%
select(state, candidate, state_total, Region) %>%
distinct()
## Joining, by = "state"
election_map_df =
election_winner_df %>%
filter(state_winner == TRUE) %>%
mutate(region = tolower(state)) %>%
select(state, candidate, party_total, state_total, region) %>%
distinct()
usa_map = map_data("state")
usa_election_map = left_join(usa_map, election_map_df)
## Joining, by = "region"
colors <- c("red", "blue")
names(colors) = c("Donald Trump", "Joe Biden")
election_result_map =
ggplot(data = usa_election_map,
aes(x = long, y = lat,
group = group, fill = candidate,
text = paste("State: ", state ,
"</br></br>Candidate: ", candidate,
"</br>Votes: ", party_total,
"</br>Winning Proportion: ", round(party_total/state_total, 2)))) +
geom_polygon(color = "gray90", size = 0.1) +
labs(title = "Election Results across states") +
scale_fill_manual(values = colors) +
theme_void() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom")
ggplotly(election_result_map, tooltip = "text")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
trump_df =
merge(
read_csv("./datasets/trump1.csv"),
read_csv("./datasets/trump2.csv"),
all = TRUE
) %>%
select(!X1) %>%
separate(created_at, into = c("creation_date", "creation_time"), sep = " ") %>%
separate(creation_date, into = c("creation_year", "creation_month", "creation_day"), sep = "-") %>%
separate(user_join_date, into = c("join_date", "join_time"), sep = " ") %>%
separate(join_date, into = c("join_year", "join_month", "join_day"), sep = "-") %>%
mutate(hashtag = "Trump")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_double(),
## created_at = col_datetime(format = ""),
## tweet_id = col_double(),
## likes = col_double(),
## retweet_count = col_double(),
## user_id = col_double(),
## user_join_date = col_datetime(format = ""),
## user_followers_count = col_double(),
## lat = col_double(),
## long = col_double(),
## collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_double(),
## created_at = col_datetime(format = ""),
## tweet_id = col_double(),
## likes = col_double(),
## retweet_count = col_double(),
## user_id = col_double(),
## user_join_date = col_datetime(format = ""),
## user_followers_count = col_double(),
## lat = col_double(),
## long = col_double(),
## collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
biden_df =
merge(
read_csv("./datasets/biden1.csv"),
read_csv("./datasets/biden2.csv"),
all = TRUE
) %>%
select(!X1) %>%
separate(created_at, into = c("creation_date", "creation_time"), sep = " ") %>%
separate(creation_date, into = c("creation_year", "creation_month", "creation_day"), sep = "-") %>%
separate(user_join_date, into = c("join_date", "join_time"), sep = " ") %>%
separate(join_date, into = c("join_year", "join_month", "join_day"), sep = "-") %>%
mutate(hashtag = "Biden")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_double(),
## created_at = col_datetime(format = ""),
## tweet_id = col_double(),
## likes = col_double(),
## retweet_count = col_double(),
## user_id = col_double(),
## user_join_date = col_datetime(format = ""),
## user_followers_count = col_double(),
## lat = col_double(),
## long = col_double(),
## collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_double(),
## created_at = col_datetime(format = ""),
## tweet_id = col_double(),
## likes = col_double(),
## retweet_count = col_double(),
## user_id = col_double(),
## user_join_date = col_datetime(format = ""),
## user_followers_count = col_double(),
## lat = col_double(),
## long = col_double(),
## collected_at = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
tweets_usa =
merge(biden_df, trump_df, all = TRUE) %>%
filter(country == "United States of America")
usa_map <- map_data("state")
tweet_map <- tweets_usa %>%
group_by(state, hashtag) %>%
summarise(count = n(),
likes = sum(likes)) %>%
mutate (likes_tweets = likes*count,
region = tolower(state)) %>%
select (region, hashtag, likes_tweets) %>%
pivot_wider(names_from = "hashtag",
values_from = "likes_tweets") %>%
mutate(top = case_when(Biden>coalesce(Trump,0) ~ "Biden",
Trump>Biden ~ "Trump"))
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
## Adding missing grouping variables: `state`
states_tweet_map <- left_join(usa_map, tweet_map)
## Joining, by = "region"
colors <- c("red", "blue")
names(colors) = c("Trump", "Biden")
tweet_result_map =
ggplot(data = states_tweet_map,
aes(x = long, y = lat,
group = group, fill = top,
text = paste("State: ", state ,
"</br></br>Candidate: ", top ))) +
geom_polygon(color = "gray90", size = 0.1) +
labs(title = "Tweets Results across states") +
scale_fill_manual(values = colors) +
theme_void() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom")
ggplotly(tweet_result_map, tooltip = "text")